##################################################################
# Responsive Rhetoric: Evidence from Congressional Redistricting #
# Jaclyn Kaslovsky and Michael Kistner
# Main Regression Code
##################################################################

###############################################
# Load Packages and Set the Working Directory #
###############################################

library(broom)
library(cowplot)
library(fixest)
library(ggpubr)
library(lubridate)
library(magrittr)
library(modelsummary)
library(tictoc)
library(tidyverse)
library(quanteda)
library(quanteda.textmodels)
library(quanteda.textstats)

# setwd("~/Dropbox/Redistricting, Ideology, and Communication/Replication Files/Data")

# Specify options 
options("modelsummary_format_numeric_latex" = "plain")

####################
# Load in the data #
####################

load("Replication Files/Data/District and Member Changes (116th and 117th).Rda")
load("Replication Files/Data/Tweets by Members of Congress (116th and 117th).Rda")

###############################
# Summary Stats              #
###############################

# Median number of tweets during the 117th
member_tweets %>%
  filter(Date > mdy("01-03-2021")) %>%
  group_by(MemberName) %>%
  summarize(n_tweets = n()) %>% 
  pull(n_tweets) %>%
  median()

# Min number of tweets
member_tweets %>%
  filter(Date > mdy("01-03-2021")) %>%
  group_by(MemberName) %>%
  summarize(n_tweets = n()) %>%
  filter(n_tweets == min(n_tweets))

# Max number of tweets
member_tweets %>%
  filter(Date > mdy("01-03-2021")) %>%
  group_by(MemberName) %>%
  summarize(n_tweets = n()) %>%
  filter(n_tweets == max(n_tweets))

# Districts in which Cook PVI changed by at least 5 percentage points
prepost_df %>%
  mutate(change = ifelse(abs(CookPVI_Change)>=5,1,0)) %>%
  pull(change) %>%
  mean()


###############################
# Table 1                     #
###############################

# Effect of Cook PVI change on tweet ideology change
model_1 <- feols(TweetExtremism_Change ~ CookFavorability_Change + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

# Effect of Cook PVI change on tweet ideology change (Democrats)
model_2 <- feols(TweetExtremism_Change ~ CookFavorability_Change,
                 vcov = "hetero",
                 data = filter(prepost_df, Party == "D", RanForReelection == 1))

# Effect of Cook PVI change on tweet ideology change (Republicans)
model_3 <-  feols(TweetExtremism_Change ~ CookFavorability_Change,
                  vcov = "hetero",
                  data = filter(prepost_df, Party == "R", RanForReelection == 1))

# Effect of 21/22 Cook PVI on tweet ideology in the 117th
model_4 <- feols(TweetExtremism_117 ~ CookFavorability_116 + 
                   CookFavorability_117 + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, RanForReelection == 1))

# Effect of 21/22 Cook PVI on tweet ideology in the 116th
model_5 <- feols(TweetExtremism_116 ~ CookFavorability_116 + 
                   CookFavorability_117 + Party,
                 vcov = "hetero",
                 data = filter(prepost_df, 
                               RanForReelection == 1,
                               !(is.na(TweetExtremism_117))))

# Calculate ratio (accountability % of total effect)
coef(model_4)[3] / coef(model_5)[2]

# Create table of results
modelsummary(list("Change" = model_1,
                  "Change" = model_2,
                  "Change" = model_3,
                  "117th" = model_4,
                  "116th" = model_5),
             output = "latex",
             fmt=4,
             stars= c('+' = .10,
                      '*' = .05,
                      '**' = .01),
             statistic = "({std.error})",
             estimate  = "{estimate}{stars}",
             coef_map = c("CookFavorability_Change" = "District Partisanship Change",
                          "CookFavorability_116" = "2020 District Partisanship",
                          "CookFavorability_117" = "2022 District Partisanship",
                          "PartyR" = "Republican"),
             add_rows = data.frame(
               term = c("Sample"),
               model_1 = c("Both Parties"),
               model_2 = c("Democrats"),
               model_4 = c("Republicans"),
               model_1 = c("Both Parties"),
               model_1 = c("Both Parties")
             ),
             gof_map = c("nobs", "adj.r.squared"),
             note = "Robust standard errors shown in parentheses. +p<0.10, *p<0.05; **p<0.01")


###############################
# Figure 3                   #
###############################

### Plotting the Relationship Between Tweet and Roll Call Extremism Change ###
prepost_df %>%
  filter(abs(CookPVI_Change) >= 5,
         !(is.na(Dim1_Change)),
         !(is.na(TweetSlantR_Change))) %>% 
  mutate(Party = ifelse(Party == "R", "Republicans", "Democrats")) %>%
  ggplot(aes(x = VoteExtremism_Change, y = TweetExtremism_Change)) +
  geom_smooth(method = "lm", color = "black", alpha = 0.3) +
  geom_point(shape = 1) +
  stat_cor(method = "pearson") +
  labs(x = "Change in W-NOMINATE Score \n (Higher Values More Extreme)",
       y = "Change in Tweet Partisan Extremism \n (Higher Values More Extreme)") +
  facet_wrap(~Party, scales = "free") + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank()
  )


###############################
# Figure 4                   #
###############################

### Comparing Effect Sizes of Redistricting on Votes and Tweets ###
graphing_data <- prepost_df %>%
  select(Dim1_116, TweetSlantR_116, Party) %>%
  pivot_longer(cols = c(Dim1_116, TweetSlantR_116),
               names_to = "Measure", 
               values_to = "Value") %>%
  mutate(Measure = ifelse(Measure == "Dim1_116", "Estimated Effect of Redistricting on W-NOMINATE Score", 
                          "Estimated Effect of Redistricting on Tweet Partisan Extremity"),
         Measure = as.factor(Measure),
         Party = ifelse(Party == "R", "Republicans", "Democrats")) 

wnom_plot <- graphing_data %>% 
  filter(Measure == "Estimated Effect of Redistricting on W-NOMINATE Score") %>%
  ggplot(aes(x = Value, fill = Party, shape = Party)) +
  geom_density(aes(lty = Party), alpha = 0.3) + 
  scale_fill_manual(values = c("navy", "darkred")) +
  geom_curve(
    aes(x = median(Value[Party == "Democrats"], na.rm = TRUE), y = 1, 
        xend = median(Value[Party == "Democrats"], na.rm = TRUE) + 5 * 0.0029, yend = 1),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  geom_curve(
    aes(x = median(Value[Party == "Democrats"], na.rm = TRUE), y = 1, 
        xend = median(Value[Party == "Democrats"], na.rm = TRUE) - 5 * 0.0029, yend = 1),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  geom_curve(
    aes(x = median(Value[Party == "Republicans"], na.rm = TRUE), y = 1, 
        xend = median(Value[Party == "Republicans"], na.rm = TRUE) + 5 * 0.0028, yend = 1),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  geom_curve(
    aes(x = median(Value[Party == "Republicans"], na.rm = TRUE), y = 1, 
        xend = median(Value[Party == "Republicans"], na.rm = TRUE) - 5 * 0.0028, yend = 1),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  labs(y = "Density", 
       x = "W-NOMINATE Score (116th Congress)",
       fill = NULL, lty = NULL) +
  facet_wrap(~Measure) + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = c(0.4, 0.8),
    axis.ticks.y = element_blank(),
    axis.text.y = element_blank()
  )

tweet_plot <- graphing_data %>% 
  filter(Measure != "Estimated Effect of Redistricting on W-NOMINATE Score") %>%
  ggplot(aes(x = Value, fill = Party, shape = Party)) +
  geom_density(aes(lty = Party), alpha = 0.3) + 
  scale_fill_manual(values = c("navy", "darkred")) + 
  scale_shape_manual(values = c(21, 25)) +
  geom_curve(
    aes(x = median(Value[Party == "Democrats"], na.rm = TRUE), y = 2.5, 
        xend = median(Value[Party == "Democrats"], na.rm = TRUE) + 5 * 0.0014, yend = 2.5),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  geom_curve(
    aes(x = median(Value[Party == "Democrats"], na.rm = TRUE), y = 2.5, 
        xend = median(Value[Party == "Democrats"], na.rm = TRUE) - 5 * 0.0014, yend = 2.5),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  geom_curve(
    aes(x = median(Value[Party == "Republicans"], na.rm = TRUE), y = 2.5, 
        xend = median(Value[Party == "Republicans"], na.rm = TRUE) + 5 * 0.0028, yend = 2.5),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  geom_curve(
    aes(x = median(Value[Party == "Republicans"], na.rm = TRUE), y = 2.5, 
        xend = median(Value[Party == "Republicans"], na.rm = TRUE) - 5 * 0.0028, yend = 2.5),
    arrow = arrow(length = unit(0.02, "npc"),
                  angle = 35, type = "closed"),
    curvature = 0
  ) +
  labs(y = "Density", 
       x = "Tweet Partisan Extremity (116th Congress)", 
       fill = NULL, lty = NULL) +
  facet_wrap(~Measure) + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "none",
    axis.ticks.y = element_blank(),
    axis.text.y = element_blank()
  )

plot_grid(tweet_plot, wnom_plot, 
          nrow = 2, 
          rel_heights = c(0.5, 0.55))


###############################
# Figure 5                   #
###############################

# Set seed
set.seed(02138)

#  Add variables on redistricting change to tweet data
member_tweets <- prepost_df %>%
  filter(!(is.na(Dim1_Change))) %>%
  mutate(RedistrictedCategories =
           case_when(
             CookPVI_Change >= 5 ~ "More Partisan",
             CookPVI_Change <= -5 ~ "More Moderate",
             TRUE ~ "No Significant Change"
           )) %>%
  select(ICPSR, RedistrictedCategories) %>%
  left_join(member_tweets, ., by = c("MemberICPSR" = "ICPSR")) %>%
  filter(RedistrictedCategories != "No Significant Change")

# Create document corpus from tweets
tweet_corpus <- member_tweets %>%
  mutate(Text = str_replace_all(TextExpanded, "[/-]", " "), # extract words from URLs
         MemberSession = str_c(MemberICPSR, Session, "-"),
         DocID = TweetID,
         TweetText = TextExpanded,
         Group = str_c(MemberParty, "-", RedistrictedCategories, "-", Session)) %>% 
  select(DocID, TweetID, TweetText, Date, Session, MemberName, MemberICPSR, 
         MemberSession, MemberParty, RedistrictedCategories, Group,
         RetweetCount, ReplyCount, LikeCount, QuoteCount, TextExpanded) %>% 
  group_by(MemberSession) %>%
  mutate(TweetCount = n()) %>%
  ungroup() %>%
  corpus(text_field = "TextExpanded", 
         docid_field = "TweetID")

# Convert corpus into document-feature matrix
tweet_dfm <- tweet_corpus %>%
  tokens(remove_punct = TRUE,
         remove_symbols = TRUE,
         remove_numbers = FALSE,
         remove_url = FALSE, 
         split_hyphens = FALSE,
         split_tags = FALSE) %>%
  tokens_tolower() %>%
  tokens_select(min_nchar=2L) %>% 
  tokens_select(pattern = stopwords("en"), 
                selection = "remove") %>% 
  tokens_ngrams(n = c(1)) %>%
  dfm() %>%
  dfm_trim(min_docfreq = 0.0005, docfreq_type = "prop")

# Convert to matrix for Democrats
dem_dfm <- tweet_dfm %>%
  dfm_subset(MemberParty == "D") %>%
  dfm_group(Group) %>%
  as.matrix()

# Convert to matrix for Republicans
rep_dfm <- tweet_dfm %>%
  dfm_subset(MemberParty == "R") %>%
  dfm_group(Group) %>%
  as.matrix()

# Remove infrequently used words
rep_dfm <- rep_dfm[ , apply(rep_dfm, 2, function(x) ifelse(min(x) < 225, FALSE, TRUE))]
dem_dfm <- dem_dfm[ , apply(dem_dfm, 2, function(x) ifelse(min(x) < 225, FALSE, TRUE))]

# Estimate diff-in-diff model for Democrats 
d_did <- (((dem_dfm[4,] - dem_dfm[3,]) / dem_dfm[3,]) -
            ((dem_dfm[2,] - dem_dfm[1,]) / dem_dfm[1,])) 

# Estimate diff-in-diff model for Republicans
r_did <- (((rep_dfm[4,] - rep_dfm[3,]) / rep_dfm[3,]) -
            ((rep_dfm[2,] - rep_dfm[1,]) / rep_dfm[1,]))

# Put data into form for graphing for Democrats
graphing_data_d <- data.frame(
  feature = names(d_did),
  did = d_did) %>%
  filter(str_detect(feature, "[a-zA-Z]"))

# Create graph for Democrats
graphing_data_d <- graphing_data_d %>%
  arrange(did) %>%
  mutate(Party = "Democrats",
         feature = str_to_title(feature),
         feature = case_when( # fix capitalization/punctuation for specific features
           feature == "Rep" ~ "Rep.",
           feature == "Icymi" ~ "ICYMI",
           feature == "@Potus" ~ "@POTUS",
           feature == "@Housegop" ~ "@HouseGOP",
           TRUE ~ feature)) %>%
  select(feature, 
         did, Party) %>%
  slice(1:15, (nrow(graphing_data_d) - 14):nrow(graphing_data_d)) %>%
  bind_rows(
    data.frame(
      feature = "",
      did = 0,
      Party = "Democrats"
    )
  ) %>%
  mutate(feature = reorder(feature, did))

# Put data into form for graphing for Republicans
graphing_data_r <- data.frame(
  feature = names(r_did),
  did = r_did) %>%
  filter(str_detect(feature, "[a-zA-Z]"))

# Create graph for Republicans
graphing_data_r <- graphing_data_r %>%
  arrange(did) %>%
  mutate(Party = "Republicans",
         feature = str_to_title(feature),
         feature = case_when( # fix capitalization/punctuation for specific features
           feature == "Rep" ~ "Rep.",
           feature == "Icymi" ~ "ICYMI",
           feature == "@Potus" ~ "@POTUS",
           feature == "@Housegop" ~ "@HouseGOP",
           TRUE ~ feature)) %>%
  select(feature, # word_category, 
         did, Party) %>%
  slice(1:15, (nrow(graphing_data_r) - 14):nrow(graphing_data_r)) %>%
  bind_rows(
    data.frame(
      feature = "",
      did = 0,
      Party = "Republicans"
    )
  ) %>%
  mutate(feature = reorder(feature, did)) 

### Bootstrapping Inference ----------------------------------------------------
# Create Republican DFM
rep_dfm <- tweet_dfm %>%
  dfm_subset(MemberParty == "R")

# Create Democratic DFM
dem_dfm <- tweet_dfm %>%
  dfm_subset(MemberParty == "D")

# Bootstrapping loop
for(i in 1:1000){
  rep_boot <- rep_dfm %>%
    dfm_sample(size = nrow(rep_dfm),
               replace = TRUE) %>%
    dfm_group(Group) %>%
    as.matrix()
  
  dem_boot <- dem_dfm %>%
    dfm_sample(size = nrow(dem_dfm),
               replace = TRUE) %>%
    dfm_group(Group) %>%
    as.matrix()
  
  # Estimate the diff-in-diff model for Democrats and Republicans 
  d_bootdid <- (((dem_boot[4,] - dem_boot[3,]) / dem_boot[3,]) -
                  ((dem_boot[2,] - dem_boot[1,]) / dem_boot[1,])) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    setNames(c("feature", str_c("boot", i)))  %>%
    mutate(
      feature = str_to_title(feature),
      feature = case_when(
        feature == "Rep" ~ "Rep.",
        feature == "Icymi" ~ "ICYMI",
        feature == "@Potus" ~ "@POTUS",
        feature == "@Housegop" ~ "@HouseGOP",
        TRUE ~ feature)
    )
  
  r_bootdid <- (((rep_boot[4,] - rep_boot[3,]) / rep_boot[3,]) -
                  ((rep_boot[2,] - rep_boot[1,]) / rep_boot[1,])) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    setNames(c("feature", str_c("boot", i))) %>%
    mutate(
      feature = str_to_title(feature),
      feature = case_when(
        feature == "Rep" ~ "Rep.",
        feature == "Icymi" ~ "ICYMI",
        feature == "@Potus" ~ "@POTUS",
        feature == "@Housegop" ~ "@HouseGOP",
        TRUE ~ feature)
    )
  
  graphing_data_r <- graphing_data_r %>%
    left_join(r_bootdid,
              by = join_by(feature))
  
  graphing_data_d <- graphing_data_d %>%
    left_join(d_bootdid,
              by = join_by(feature))
  
  if(i %% 50 == 0){
    print(str_c("Completed ", i, " iterations."))
  }
}

# Calculate lower and upper bounds of 95% confidence intervals
graphing_data_r$lower <- graphing_data_r %>%
  select(-ends_with(".x")) %>%
  select(starts_with("boot")) %>%
  apply(1, function(x) quantile(x, 0.025, na.rm = TRUE))

graphing_data_r$upper <- graphing_data_r %>%
  select(-ends_with(".x")) %>%
  select(starts_with("boot")) %>%
  apply(1, function(x) quantile(x, 0.975, na.rm = TRUE))

graphing_data_d$lower <- graphing_data_d %>%
  select(-ends_with(".x")) %>%
  select(starts_with("boot")) %>%
  apply(1, function(x) quantile(x, 0.025, na.rm = TRUE))

graphing_data_d$upper <- graphing_data_d %>%
  select(-ends_with(".x")) %>%
  select(starts_with("boot")) %>%
  apply(1, function(x) quantile(x, 0.975, na.rm = TRUE))


### Creating Graph -------------------------------------------------------------
# Create plot for Republicans
r_plot <- graphing_data_r %>%
  mutate(feature = reorder(as.factor(feature), did)) %>%
  ggplot(aes(x = did, y = feature)) +
  geom_col(color = "black", fill = "white") +
  geom_point() +
  geom_point(aes(x = 0, y = ""), color = "white") +
  geom_vline(aes(xintercept = 0), lty = "dashed") + 
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0.3) +
  scale_x_continuous(labels = scales::percent,
                     breaks = c(-1.5, 0, 1.5, 3, 4.5, 6)) + 
  facet_wrap(~Party) +
  labs(x = "Used More by \n Members in More \n Moderate Districts", 
       y = NULL, fill = "Word Category") + 
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = c(0.78, 0.12),
    legend.box.background = element_rect(),
    axis.ticks.y = element_blank()
  )

# Create plot for Democrats
d_plot <- graphing_data_d %>%
  mutate(feature = reorder(as.factor(feature), did)) %>%
  ggplot(aes(x = did, y = feature)) +
  geom_col(color = "black", fill = "white") +
  geom_point() +
  geom_point(aes(x = 0, y = ""), color = "white") +
  geom_vline(aes(xintercept = 0), lty = "dashed") + 
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0.3) +
  scale_x_continuous(labels = scales::percent) + 
  facet_wrap(~Party) +
  labs(x = "Used More by \n Members in More \n Partisan Districts", 
       y = NULL) + 
  theme_bw()  +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "none"
  )

# Combine plots
plot_grid(r_plot, d_plot, nrow = 1)


